import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import datetime
# Change path to the files
pwd = '/Users/junyuyao/Desktop/德州奥斯汀/G14 summer/cs data research/JointUrban2003/fast response sf6/TGA/'
os.chdir(pwd)
df = pd.read_csv('I02R1V0.CSV',
names=
['Day_of_Year',
'UTC_Hours',
'UTC_Minutes',
'UTC_seconds',
'IOP_number',
'van_number',
'pass_number',
'hours_in_CDT_day_start',
'latitude',
'longitude',
'altitude',
'number_of_satellites',
'hdop',
'concentration_of_SF6',
'QC_flag'])
df
| Day_of_Year | UTC_Hours | UTC_Minutes | UTC_seconds | IOP_number | van_number | pass_number | hours_in_CDT_day_start | latitude | longitude | altitude | number_of_satellites | hdop | concentration_of_SF6 | QC_flag | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 183 | 14 | 0 | 0.0 | 2 | 0 | -1 | 9.000000 | 35.469797 | -97.5147 | 353 | -1 | 0.0 | -999.0 | 5 |
| 1 | 183 | 14 | 0 | 0.5 | 2 | 0 | -1 | 9.000139 | 35.469797 | -97.5147 | 353 | -1 | 0.0 | -999.0 | 5 |
| 2 | 183 | 14 | 0 | 1.0 | 2 | 0 | -1 | 9.000278 | 35.469797 | -97.5147 | 353 | -1 | 0.0 | -999.0 | 5 |
| 3 | 183 | 14 | 0 | 1.5 | 2 | 0 | -1 | 9.000417 | 35.469797 | -97.5147 | 353 | -1 | 0.0 | -999.0 | 5 |
| 4 | 183 | 14 | 0 | 2.0 | 2 | 0 | -1 | 9.000556 | 35.469797 | -97.5147 | 353 | -1 | 0.0 | -999.0 | 5 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 10796 | 183 | 15 | 29 | 58.0 | 2 | 0 | -1 | 10.499444 | 35.469797 | -97.5147 | 353 | -1 | 0.0 | -999.0 | 5 |
| 10797 | 183 | 15 | 29 | 58.5 | 2 | 0 | -1 | 10.499583 | 35.469797 | -97.5147 | 353 | -1 | 0.0 | -999.0 | 5 |
| 10798 | 183 | 15 | 29 | 59.0 | 2 | 0 | -1 | 10.499722 | 35.469797 | -97.5147 | 353 | -1 | 0.0 | -999.0 | 5 |
| 10799 | 183 | 15 | 29 | 59.5 | 2 | 0 | -1 | 10.499861 | 35.469797 | -97.5147 | 353 | -1 | 0.0 | -999.0 | 5 |
| 10800 | 183 | 15 | 30 | 0.0 | 2 | 0 | -1 | 10.500000 | 35.469797 | -97.5147 | 353 | -1 | 0.0 | -999.0 | 5 |
10801 rows × 15 columns
#read in the files
for xx in range(1,11):
#change the file name at 10
ns = 'I0'
if xx == 10:
ns = 'I'
for y in range(1,5):
for z in range(0,10):
#check if path existed
if os.path.exists(ns+str(xx)+'R'+str(y)+'V'+str(z)+'.CSV'):
# Read the csv files and naming its columns
df = pd.read_csv(ns+str(xx)+'R'+str(y)+'V'+str(z)+'.CSV',
names=
['Day_of_Year',
'UTC_Hours',
'UTC_Minutes',
'UTC_seconds',
'IOP_number',
'van_number',
'pass_number',
'hours_in_CDT_day_start',
'latitude',
'longitude',
'altitude',
'number_of_satellites',
'hdop',
'concentration_of_SF6',
'QC_flag'])
# generate release Dataframe dff based on the first csv
# generate a new columns that record the experiment in mins
# eliminate the moving van data
if z == 5:
continue
# generate a new column and concat it to dff
else:
k = df['hours_in_CDT_day_start'][0]
df['Time_in_mins'] = df['hours_in_CDT_day_start'].apply(lambda x: (x-k)*60)
df['cleaned_concertration'] = df['concentration_of_SF6'].apply(lambda x: x+999)
if (df['cleaned_concertration']==0).all():
continue
# see if gps position change: if latitude, longitude, altitude stays the same
la = (df['latitude']==df['latitude'][0]).all()
lo = (df['longitude']==df['longitude'][0]).all()
al = (df['altitude']==df['altitude'][0]).all()
if la and lo and al == False:
continue
else:
# clean the concentration_of_SF6 data and create a new dataframe to be plot
df1 = df[['cleaned_concertration','Time_in_mins']].copy()
# calculate the moving averge with a with of 200
df1['SMA200'] = df1['cleaned_concertration'].rolling(200).mean()
df1.dropna(inplace = True)
# plot the graph
plt.figure(figsize=(20,5))
plt.plot(df1['Time_in_mins'],df1['cleaned_concertration'],label = 'concentration_of_SF6')
plt.plot(df1['Time_in_mins'],df1['SMA200'],label = 'SMA200')
# label the graph
plt.xlabel('Time_in_mins')
plt.ylabel('concentration_of_SF6')
plt.legend()
plt.title('I0'+str(xx)+'R'+str(y)+'V'+str(z)+'.CSV')
plt.show()
# if path not exist, continue
else:
continue
#read in the files
TGA_df = pd.DataFrame()
for xx in range(1,11):
#change the file name at 10
ns = 'I0'
if xx == 10:
ns = 'I'
for y in range(1,5):
for z in range(0,10):
#check if path existed
if os.path.exists(ns+str(xx)+'R'+str(y)+'V'+str(z)+'.CSV'):
# Read the csv files and naming its columns
df = pd.read_csv(ns+str(xx)+'R'+str(y)+'V'+str(z)+'.CSV',
names=
['Day_of_Year',
'UTC_Hours',
'UTC_Minutes',
'UTC_seconds',
'IOP_number',
'van_number',
'pass_number',
'hours_in_CDT_day_start',
'latitude',
'longitude',
'altitude',
'number_of_satellites',
'hdop',
'concentration_of_SF6',
'QC_flag'])
# generate release Dataframe dff based on the first csv
if z == 0:
vn = 'concentration_van'+str(z)
df[vn] = df['concentration_of_SF6']+999
dff = df[['Day_of_Year','UTC_Hours','UTC_Minutes','UTC_seconds',vn]]
# eliminate the moving van data
elif z == 5:
continue
# generate a new column and concat it to dff
else:
vn = 'concentration_van'+str(z)
df[vn] = df['concentration_of_SF6']+999
df = pd.DataFrame(df[vn])
dff = pd.concat([dff,df], axis = 1)
# if path not exist, continue
else:
continue
TGA_df = pd.concat([TGA_df,dff])
TGA_df
| Day_of_Year | UTC_Hours | UTC_Minutes | UTC_seconds | concentration_van0 | concentration_van1 | concentration_van2 | concentration_van3 | concentration_van4 | concentration_van6 | concentration_van7 | concentration_van8 | concentration_van9 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 180 | 14 | 0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1030.6 | 0.0 | 0.0 | 0.0 | 0.0 |
| 1 | 180 | 14 | 0 | 0.5 | 0.0 | 0.0 | 0.0 | 0.0 | 1030.5 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2 | 180 | 14 | 0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1033.7 | 0.0 | 0.0 | 0.0 | 0.0 |
| 3 | 180 | 14 | 0 | 1.5 | 0.0 | 0.0 | 0.0 | 0.0 | 1033.7 | 0.0 | 0.0 | 0.0 | 0.0 |
| 4 | 180 | 14 | 0 | 2.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1024.9 | 0.0 | 0.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 10796 | 210 | 9 | 29 | 58.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 10797 | 210 | 9 | 29 | 58.5 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 10798 | 210 | 9 | 29 | 59.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 10799 | 210 | 9 | 29 | 59.5 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 10800 | 210 | 9 | 30 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
432040 rows × 13 columns
# Create datetime series
TGA_df['datetime'] = pd.to_datetime((2003 * 1000 + TGA_df['Day_of_Year']).astype(str) + " "
+ TGA_df['UTC_Hours'].astype(str)
+ ':' + TGA_df['UTC_Minutes'].astype(str)
+ ':' + TGA_df['UTC_seconds'].astype(str),
format='%Y%j %H:%M:%S.%f'
)
# Create new indices
TGA_df = TGA_df.reset_index(level=0).set_index('datetime')
TGA_df = TGA_df.sort_values(['datetime'])
# Remove unncessary timestamps
Final_TGA_df = TGA_df.drop(columns=[
'Day_of_Year',
'UTC_Hours',
'UTC_Minutes',
'UTC_seconds',
'index'
])
Final_TGA_df
| concentration_van0 | concentration_van1 | concentration_van2 | concentration_van3 | concentration_van4 | concentration_van6 | concentration_van7 | concentration_van8 | concentration_van9 | |
|---|---|---|---|---|---|---|---|---|---|
| datetime | |||||||||
| 2003-06-29 14:00:00.000 | 0.0 | 0.0 | 0.0 | 0.0 | 1030.6 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2003-06-29 14:00:00.500 | 0.0 | 0.0 | 0.0 | 0.0 | 1030.5 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2003-06-29 14:00:01.000 | 0.0 | 0.0 | 0.0 | 0.0 | 1033.7 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2003-06-29 14:00:01.500 | 0.0 | 0.0 | 0.0 | 0.0 | 1033.7 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2003-06-29 14:00:02.000 | 0.0 | 0.0 | 0.0 | 0.0 | 1024.9 | 0.0 | 0.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2003-07-29 09:29:58.000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2003-07-29 09:29:58.500 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2003-07-29 09:29:59.000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2003-07-29 09:29:59.500 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 2003-07-29 09:30:00.000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
432040 rows × 9 columns
# Change path to the files
pwd = '/Users/junyuyao/Desktop/德州奥斯汀/G14 summer/cs data research/JointUrban2003/Profile Data/'
os.chdir(pwd)
weather_df = pd.read_excel('weather data.xlsx')
weather_df
| Julian (Date) | HH:MM:SS.s(UTC) | TIME (sec) | HGHT(m) | PRESS(hPa) | Temp(K) | Theta(K) | RH(%) | MXRAT(g/kg) | WSPD(m/s) | WDIR(Deg) | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | 2003180.0 | 14:00:00 | 0.0 | 0.0 | 975.8 | 296.7 | 298.8 | 67.0 | 12.7 | 2.0 | 200.0 |
| 3 | 2003180.0 | 14:00:00 | 2.0 | 5.0 | 975.4 | 296.7 | 298.8 | 67.0 | 12.7 | -999.9 | -999.9 |
| 4 | 2003180.0 | 14:00:00 | 4.0 | 12.0 | 974.4 | 296.2 | 298.4 | 68.0 | 12.5 | -999.9 | -999.9 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 115722 | 2003210.0 | 09:01:00 | 6324.0 | 20247.0 | 52.3 | 215.7 | 501.0 | 1.0 | 0.0 | -999.9 | -999.9 |
| 115723 | 2003210.0 | 09:01:00 | 6326.0 | 20256.0 | 52.2 | 215.8 | 501.5 | 1.0 | 0.0 | -999.9 | -999.9 |
| 115724 | 2003210.0 | 09:01:00 | 6328.0 | 20264.0 | 52.1 | 215.8 | 501.8 | 1.0 | 0.0 | -999.9 | -999.9 |
| 115725 | 2003210.0 | 09:01:00 | 6330.0 | 20268.0 | 52.1 | 215.7 | 501.6 | 1.0 | 0.0 | -999.9 | -999.9 |
| 115726 | 2003210.0 | 09:01:00 | 6332.0 | 20268.0 | 52.1 | 215.6 | 501.3 | 1.0 | 0.0 | -999.9 | -999.9 |
115727 rows × 11 columns
# clear the empty rows
weather_df1= weather_df.drop(weather_df[weather_df.isnull().T.all()].index)
weather_df1 = weather_df1.reset_index(drop= True)
weather_df1
| Julian (Date) | HH:MM:SS.s(UTC) | TIME (sec) | HGHT(m) | PRESS(hPa) | Temp(K) | Theta(K) | RH(%) | MXRAT(g/kg) | WSPD(m/s) | WDIR(Deg) | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2003180.0 | 14:00:00 | 0.0 | 0.0 | 975.8 | 296.7 | 298.8 | 67.0 | 12.7 | 2.0 | 200.0 |
| 1 | 2003180.0 | 14:00:00 | 2.0 | 5.0 | 975.4 | 296.7 | 298.8 | 67.0 | 12.7 | -999.9 | -999.9 |
| 2 | 2003180.0 | 14:00:00 | 4.0 | 12.0 | 974.4 | 296.2 | 298.4 | 68.0 | 12.5 | -999.9 | -999.9 |
| 3 | 2003180.0 | 14:00:00 | 6.0 | 18.0 | 973.7 | 296.0 | 298.3 | 69.0 | 12.5 | -999.9 | -999.9 |
| 4 | 2003180.0 | 14:00:00 | 8.0 | 29.0 | 972.5 | 295.8 | 298.2 | 70.0 | 12.6 | -999.9 | -999.9 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 115652 | 2003210.0 | 09:01:00 | 6324.0 | 20247.0 | 52.3 | 215.7 | 501.0 | 1.0 | 0.0 | -999.9 | -999.9 |
| 115653 | 2003210.0 | 09:01:00 | 6326.0 | 20256.0 | 52.2 | 215.8 | 501.5 | 1.0 | 0.0 | -999.9 | -999.9 |
| 115654 | 2003210.0 | 09:01:00 | 6328.0 | 20264.0 | 52.1 | 215.8 | 501.8 | 1.0 | 0.0 | -999.9 | -999.9 |
| 115655 | 2003210.0 | 09:01:00 | 6330.0 | 20268.0 | 52.1 | 215.7 | 501.6 | 1.0 | 0.0 | -999.9 | -999.9 |
| 115656 | 2003210.0 | 09:01:00 | 6332.0 | 20268.0 | 52.1 | 215.6 | 501.3 | 1.0 | 0.0 | -999.9 | -999.9 |
115657 rows × 11 columns
type(weather_df1['HH:MM:SS.s(UTC)'][0])
datetime.time
# Create datetime series
weather_df1['datetime'] = pd.to_datetime( weather_df1['Julian (Date)']
,format ='%Y%j'
)
# Create new indices
weather_df1
| Julian (Date) | HH:MM:SS.s(UTC) | TIME (sec) | HGHT(m) | PRESS(hPa) | Temp(K) | Theta(K) | RH(%) | MXRAT(g/kg) | WSPD(m/s) | WDIR(Deg) | datetime | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2003180.0 | 14:00:00 | 0.0 | 0.0 | 975.8 | 296.7 | 298.8 | 67.0 | 12.7 | 2.0 | 200.0 | 2003-06-29 |
| 1 | 2003180.0 | 14:00:00 | 2.0 | 5.0 | 975.4 | 296.7 | 298.8 | 67.0 | 12.7 | -999.9 | -999.9 | 2003-06-29 |
| 2 | 2003180.0 | 14:00:00 | 4.0 | 12.0 | 974.4 | 296.2 | 298.4 | 68.0 | 12.5 | -999.9 | -999.9 | 2003-06-29 |
| 3 | 2003180.0 | 14:00:00 | 6.0 | 18.0 | 973.7 | 296.0 | 298.3 | 69.0 | 12.5 | -999.9 | -999.9 | 2003-06-29 |
| 4 | 2003180.0 | 14:00:00 | 8.0 | 29.0 | 972.5 | 295.8 | 298.2 | 70.0 | 12.6 | -999.9 | -999.9 | 2003-06-29 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 115652 | 2003210.0 | 09:01:00 | 6324.0 | 20247.0 | 52.3 | 215.7 | 501.0 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 |
| 115653 | 2003210.0 | 09:01:00 | 6326.0 | 20256.0 | 52.2 | 215.8 | 501.5 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 |
| 115654 | 2003210.0 | 09:01:00 | 6328.0 | 20264.0 | 52.1 | 215.8 | 501.8 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 |
| 115655 | 2003210.0 | 09:01:00 | 6330.0 | 20268.0 | 52.1 | 215.7 | 501.6 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 |
| 115656 | 2003210.0 | 09:01:00 | 6332.0 | 20268.0 | 52.1 | 215.6 | 501.3 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 |
115657 rows × 12 columns
#convert the time
weather_df1['TIME'] = pd.to_datetime(pd.to_datetime(weather_df1['HH:MM:SS.s(UTC)'], format='%H:%M:%S')) \
-pd.to_datetime(pd.to_datetime(weather_df1['HH:MM:SS.s(UTC)'], format='%H:%M:%S')).dt.normalize()
weather_df1['TIME'] = weather_df1['TIME'] + pd.to_timedelta(weather_df1['TIME (sec)'], unit='s')
weather_df1.drop(['HH:MM:SS.s(UTC)'],axis=1)
| Julian (Date) | TIME (sec) | HGHT(m) | PRESS(hPa) | Temp(K) | Theta(K) | RH(%) | MXRAT(g/kg) | WSPD(m/s) | WDIR(Deg) | datetime | TIME | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2003180.0 | 0.0 | 0.0 | 975.8 | 296.7 | 298.8 | 67.0 | 12.7 | 2.0 | 200.0 | 2003-06-29 | 0 days 14:00:00 |
| 1 | 2003180.0 | 2.0 | 5.0 | 975.4 | 296.7 | 298.8 | 67.0 | 12.7 | -999.9 | -999.9 | 2003-06-29 | 0 days 14:00:02 |
| 2 | 2003180.0 | 4.0 | 12.0 | 974.4 | 296.2 | 298.4 | 68.0 | 12.5 | -999.9 | -999.9 | 2003-06-29 | 0 days 14:00:04 |
| 3 | 2003180.0 | 6.0 | 18.0 | 973.7 | 296.0 | 298.3 | 69.0 | 12.5 | -999.9 | -999.9 | 2003-06-29 | 0 days 14:00:06 |
| 4 | 2003180.0 | 8.0 | 29.0 | 972.5 | 295.8 | 298.2 | 70.0 | 12.6 | -999.9 | -999.9 | 2003-06-29 | 0 days 14:00:08 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 115652 | 2003210.0 | 6324.0 | 20247.0 | 52.3 | 215.7 | 501.0 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 | 0 days 10:46:24 |
| 115653 | 2003210.0 | 6326.0 | 20256.0 | 52.2 | 215.8 | 501.5 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 | 0 days 10:46:26 |
| 115654 | 2003210.0 | 6328.0 | 20264.0 | 52.1 | 215.8 | 501.8 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 | 0 days 10:46:28 |
| 115655 | 2003210.0 | 6330.0 | 20268.0 | 52.1 | 215.7 | 501.6 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 | 0 days 10:46:30 |
| 115656 | 2003210.0 | 6332.0 | 20268.0 | 52.1 | 215.6 | 501.3 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 | 0 days 10:46:32 |
115657 rows × 12 columns
weather_df1['datetime'] = weather_df1['datetime']+weather_df1['TIME']
weather_df1
| Julian (Date) | HH:MM:SS.s(UTC) | TIME (sec) | HGHT(m) | PRESS(hPa) | Temp(K) | Theta(K) | RH(%) | MXRAT(g/kg) | WSPD(m/s) | WDIR(Deg) | datetime | TIME | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2003180.0 | 14:00:00 | 0.0 | 0.0 | 975.8 | 296.7 | 298.8 | 67.0 | 12.7 | 2.0 | 200.0 | 2003-06-29 14:00:00 | 0 days 14:00:00 |
| 1 | 2003180.0 | 14:00:00 | 2.0 | 5.0 | 975.4 | 296.7 | 298.8 | 67.0 | 12.7 | -999.9 | -999.9 | 2003-06-29 14:00:02 | 0 days 14:00:02 |
| 2 | 2003180.0 | 14:00:00 | 4.0 | 12.0 | 974.4 | 296.2 | 298.4 | 68.0 | 12.5 | -999.9 | -999.9 | 2003-06-29 14:00:04 | 0 days 14:00:04 |
| 3 | 2003180.0 | 14:00:00 | 6.0 | 18.0 | 973.7 | 296.0 | 298.3 | 69.0 | 12.5 | -999.9 | -999.9 | 2003-06-29 14:00:06 | 0 days 14:00:06 |
| 4 | 2003180.0 | 14:00:00 | 8.0 | 29.0 | 972.5 | 295.8 | 298.2 | 70.0 | 12.6 | -999.9 | -999.9 | 2003-06-29 14:00:08 | 0 days 14:00:08 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 115652 | 2003210.0 | 09:01:00 | 6324.0 | 20247.0 | 52.3 | 215.7 | 501.0 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 10:46:24 | 0 days 10:46:24 |
| 115653 | 2003210.0 | 09:01:00 | 6326.0 | 20256.0 | 52.2 | 215.8 | 501.5 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 10:46:26 | 0 days 10:46:26 |
| 115654 | 2003210.0 | 09:01:00 | 6328.0 | 20264.0 | 52.1 | 215.8 | 501.8 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 10:46:28 | 0 days 10:46:28 |
| 115655 | 2003210.0 | 09:01:00 | 6330.0 | 20268.0 | 52.1 | 215.7 | 501.6 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 10:46:30 | 0 days 10:46:30 |
| 115656 | 2003210.0 | 09:01:00 | 6332.0 | 20268.0 | 52.1 | 215.6 | 501.3 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 10:46:32 | 0 days 10:46:32 |
115657 rows × 13 columns
weather_df1 = weather_df1.drop(columns=[
'Julian (Date)',
'HH:MM:SS.s(UTC)',
'TIME (sec)',
'TIME',
])
weather_df1
| HGHT(m) | PRESS(hPa) | Temp(K) | Theta(K) | RH(%) | MXRAT(g/kg) | WSPD(m/s) | WDIR(Deg) | datetime | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.0 | 975.8 | 296.7 | 298.8 | 67.0 | 12.7 | 2.0 | 200.0 | 2003-06-29 14:00:00 |
| 1 | 5.0 | 975.4 | 296.7 | 298.8 | 67.0 | 12.7 | -999.9 | -999.9 | 2003-06-29 14:00:02 |
| 2 | 12.0 | 974.4 | 296.2 | 298.4 | 68.0 | 12.5 | -999.9 | -999.9 | 2003-06-29 14:00:04 |
| 3 | 18.0 | 973.7 | 296.0 | 298.3 | 69.0 | 12.5 | -999.9 | -999.9 | 2003-06-29 14:00:06 |
| 4 | 29.0 | 972.5 | 295.8 | 298.2 | 70.0 | 12.6 | -999.9 | -999.9 | 2003-06-29 14:00:08 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 115652 | 20247.0 | 52.3 | 215.7 | 501.0 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 10:46:24 |
| 115653 | 20256.0 | 52.2 | 215.8 | 501.5 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 10:46:26 |
| 115654 | 20264.0 | 52.1 | 215.8 | 501.8 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 10:46:28 |
| 115655 | 20268.0 | 52.1 | 215.7 | 501.6 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 10:46:30 |
| 115656 | 20268.0 | 52.1 | 215.6 | 501.3 | 1.0 | 0.0 | -999.9 | -999.9 | 2003-07-29 10:46:32 |
115657 rows × 9 columns
weather_df1['datetime']
0 2003-06-29 14:00:00
1 2003-06-29 14:00:02
2 2003-06-29 14:00:04
3 2003-06-29 14:00:06
4 2003-06-29 14:00:08
...
115652 2003-07-29 10:46:24
115653 2003-07-29 10:46:26
115654 2003-07-29 10:46:28
115655 2003-07-29 10:46:30
115656 2003-07-29 10:46:32
Name: datetime, Length: 115657, dtype: datetime64[ns]
weather_df1 = weather_df1.reset_index(level=0).set_index('datetime')
weather_df1 = weather_df1.drop(['index'],axis = 1)
weather_df1
| HGHT(m) | PRESS(hPa) | Temp(K) | Theta(K) | RH(%) | MXRAT(g/kg) | WSPD(m/s) | WDIR(Deg) | |
|---|---|---|---|---|---|---|---|---|
| datetime | ||||||||
| 2003-06-29 14:00:00 | 0.0 | 975.8 | 296.7 | 298.8 | 67.0 | 12.7 | 2.0 | 200.0 |
| 2003-06-29 14:00:02 | 5.0 | 975.4 | 296.7 | 298.8 | 67.0 | 12.7 | -999.9 | -999.9 |
| 2003-06-29 14:00:04 | 12.0 | 974.4 | 296.2 | 298.4 | 68.0 | 12.5 | -999.9 | -999.9 |
| 2003-06-29 14:00:06 | 18.0 | 973.7 | 296.0 | 298.3 | 69.0 | 12.5 | -999.9 | -999.9 |
| 2003-06-29 14:00:08 | 29.0 | 972.5 | 295.8 | 298.2 | 70.0 | 12.6 | -999.9 | -999.9 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2003-07-29 10:46:24 | 20247.0 | 52.3 | 215.7 | 501.0 | 1.0 | 0.0 | -999.9 | -999.9 |
| 2003-07-29 10:46:26 | 20256.0 | 52.2 | 215.8 | 501.5 | 1.0 | 0.0 | -999.9 | -999.9 |
| 2003-07-29 10:46:28 | 20264.0 | 52.1 | 215.8 | 501.8 | 1.0 | 0.0 | -999.9 | -999.9 |
| 2003-07-29 10:46:30 | 20268.0 | 52.1 | 215.7 | 501.6 | 1.0 | 0.0 | -999.9 | -999.9 |
| 2003-07-29 10:46:32 | 20268.0 | 52.1 | 215.6 | 501.3 | 1.0 | 0.0 | -999.9 | -999.9 |
115657 rows × 8 columns
weather_df1.to_csv('pnnl_weatehr_data.csv')
weather_df1 = weather_df1.replace(-999.9, None)
combined_df = Final_TGA_df.join(weather_df1)
combined_df
| concentration_van0 | concentration_van1 | concentration_van2 | concentration_van3 | concentration_van4 | concentration_van6 | concentration_van7 | concentration_van8 | concentration_van9 | HGHT(m) | PRESS(hPa) | Temp(K) | Theta(K) | RH(%) | MXRAT(g/kg) | WSPD(m/s) | WDIR(Deg) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| datetime | |||||||||||||||||
| 2003-06-29 14:00:00.000 | 0.0 | 0.0 | 0.0 | 0.0 | 1030.6 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 975.8 | 296.7 | 298.8 | 67.0 | 12.7 | 2.0 | 200.0 |
| 2003-06-29 14:00:00.500 | 0.0 | 0.0 | 0.0 | 0.0 | 1030.5 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2003-06-29 14:00:01.000 | 0.0 | 0.0 | 0.0 | 0.0 | 1033.7 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2003-06-29 14:00:01.500 | 0.0 | 0.0 | 0.0 | 0.0 | 1033.7 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2003-06-29 14:00:02.000 | 0.0 | 0.0 | 0.0 | 0.0 | 1024.9 | 0.0 | 0.0 | 0.0 | 0.0 | 5.0 | 975.4 | 296.7 | 298.8 | 67.0 | 12.7 | -999.9 | -999.9 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2003-07-29 09:29:58.000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 5387.0 | 508.1 | 266.4 | 323.2 | 11.0 | 0.5 | -999.9 | -999.9 |
| 2003-07-29 09:29:58.500 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2003-07-29 09:29:59.000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2003-07-29 09:29:59.500 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2003-07-29 09:30:00.000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 5391.0 | 507.9 | 266.4 | 323.3 | 11.0 | 0.5 | -999.9 | -999.9 |
432040 rows × 17 columns
combined_df.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| concentration_van0 | 432040.0 | 1311.294700 | 3689.729434 | 0.0 | 0.0 | 0.0 | 1050.000 | 27848.5 |
| concentration_van1 | 432040.0 | 1120.203635 | 2372.694842 | 0.0 | 0.0 | 0.0 | 1110.325 | 16544.5 |
| concentration_van2 | 432040.0 | 756.806867 | 1797.131831 | 0.0 | 0.0 | 0.0 | 1006.100 | 14497.2 |
| concentration_van3 | 432040.0 | 1315.709074 | 2559.567178 | 0.0 | 0.0 | 0.0 | 1054.400 | 19173.3 |
| concentration_van4 | 432040.0 | 2263.084441 | 5303.310272 | 0.0 | 0.0 | 0.0 | 1234.900 | 30530.2 |
| concentration_van6 | 432040.0 | 2074.409610 | 4963.335336 | 0.0 | 0.0 | 0.0 | 1156.500 | 25039.5 |
| concentration_van7 | 432040.0 | 2134.499303 | 4869.897250 | 0.0 | 0.0 | 0.0 | 1214.200 | 27485.4 |
| concentration_van8 | 432040.0 | 1331.059029 | 2500.172832 | 0.0 | 0.0 | 0.0 | 1110.000 | 20036.3 |
| concentration_van9 | 432040.0 | 903.633411 | 2396.595616 | 0.0 | 0.0 | 0.0 | 1002.400 | 28821.7 |
| HGHT(m) | 74770.0 | 4516.052789 | 3572.363329 | 0.0 | 1824.0 | 3606.0 | 6107.750 | 29153.0 |
| PRESS(hPa) | 74770.0 | 611.000861 | 222.190875 | 22.9 | 464.5 | 637.3 | 789.100 | 978.5 |
| Temp(K) | 74770.0 | 272.504809 | 24.342725 | 196.0 | 262.3 | 278.6 | 290.800 | 405.6 |
| Theta(K) | 74770.0 | 320.758762 | 23.728325 | -999.9 | 311.5 | 316.7 | 326.900 | 764.5 |
| RH(%) | 74770.0 | -40.991320 | 280.819979 | -999.0 | 14.0 | 39.0 | 57.000 | 100.0 |
| MXRAT(g/kg) | 74770.0 | 5.019655 | 25.483241 | -999.9 | 0.7 | 4.4 | 9.200 | 97.4 |
| WSPD(m/s) | 74770.0 | -929.370206 | 257.184129 | -999.9 | -999.9 | -999.9 | -999.900 | 12.9 |
| WDIR(Deg) | 74770.0 | -914.774602 | 311.682985 | -999.9 | -999.9 | -999.9 | -999.900 | 360.0 |
combined_df.isna().sum()
concentration_van0 0 concentration_van1 0 concentration_van2 0 concentration_van3 0 concentration_van4 0 concentration_van6 0 concentration_van7 0 concentration_van8 0 concentration_van9 0 HGHT(m) 357270 PRESS(hPa) 357270 Temp(K) 357270 Theta(K) 357270 RH(%) 357270 MXRAT(g/kg) 357270 WSPD(m/s) 357270 WDIR(Deg) 357270 dtype: int64
combined_df = combined_df.replace(-999.9, None)
combined_df
| concentration_van0 | concentration_van1 | concentration_van2 | concentration_van3 | concentration_van4 | concentration_van6 | concentration_van7 | concentration_van8 | concentration_van9 | HGHT(m) | PRESS(hPa) | Temp(K) | Theta(K) | RH(%) | MXRAT(g/kg) | WSPD(m/s) | WDIR(Deg) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| datetime | |||||||||||||||||
| 2003-06-29 14:00:00.000 | 0.0 | 0.0 | 0.0 | 0.0 | 1030.6 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 975.8 | 296.7 | 298.8 | 67.0 | 12.7 | 2.0 | 200.0 |
| 2003-06-29 14:00:00.500 | 0.0 | 0.0 | 0.0 | 0.0 | 1030.5 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2003-06-29 14:00:01.000 | 0.0 | 0.0 | 0.0 | 0.0 | 1033.7 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2003-06-29 14:00:01.500 | 0.0 | 0.0 | 0.0 | 0.0 | 1033.7 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2003-06-29 14:00:02.000 | 0.0 | 0.0 | 0.0 | 0.0 | 1024.9 | 0.0 | 0.0 | 0.0 | 0.0 | 5.0 | 975.4 | 296.7 | 298.8 | 67.0 | 12.7 | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2003-07-29 09:29:58.000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 5387.0 | 508.1 | 266.4 | 323.2 | 11.0 | 0.5 | NaN | NaN |
| 2003-07-29 09:29:58.500 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2003-07-29 09:29:59.000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2003-07-29 09:29:59.500 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2003-07-29 09:30:00.000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 5391.0 | 507.9 | 266.4 | 323.3 | 11.0 | 0.5 | NaN | NaN |
432040 rows × 17 columns
combined_df.isna().sum()
concentration_van0 0 concentration_van1 0 concentration_van2 0 concentration_van3 0 concentration_van4 0 concentration_van6 0 concentration_van7 0 concentration_van8 0 concentration_van9 0 HGHT(m) 357270 PRESS(hPa) 357270 Temp(K) 357270 Theta(K) 357285 RH(%) 357270 MXRAT(g/kg) 357315 WSPD(m/s) 426810 WDIR(Deg) 426810 dtype: int64
combined_df.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| concentration_van0 | 432040.0 | 1311.294700 | 3689.729434 | 0.0 | 0.0 | 0.0 | 1050.000 | 27848.5 |
| concentration_van1 | 432040.0 | 1120.203635 | 2372.694842 | 0.0 | 0.0 | 0.0 | 1110.325 | 16544.5 |
| concentration_van2 | 432040.0 | 756.806867 | 1797.131831 | 0.0 | 0.0 | 0.0 | 1006.100 | 14497.2 |
| concentration_van3 | 432040.0 | 1315.709074 | 2559.567178 | 0.0 | 0.0 | 0.0 | 1054.400 | 19173.3 |
| concentration_van4 | 432040.0 | 2263.084441 | 5303.310272 | 0.0 | 0.0 | 0.0 | 1234.900 | 30530.2 |
| concentration_van6 | 432040.0 | 2074.409610 | 4963.335336 | 0.0 | 0.0 | 0.0 | 1156.500 | 25039.5 |
| concentration_van7 | 432040.0 | 2134.499303 | 4869.897250 | 0.0 | 0.0 | 0.0 | 1214.200 | 27485.4 |
| concentration_van8 | 432040.0 | 1331.059029 | 2500.172832 | 0.0 | 0.0 | 0.0 | 1110.000 | 20036.3 |
| concentration_van9 | 432040.0 | 903.633411 | 2396.595616 | 0.0 | 0.0 | 0.0 | 1002.400 | 28821.7 |
| HGHT(m) | 74770.0 | 4516.052789 | 3572.363329 | 0.0 | 1824.0 | 3606.0 | 6107.750 | 29153.0 |
| PRESS(hPa) | 74770.0 | 611.000861 | 222.190875 | 22.9 | 464.5 | 637.3 | 789.100 | 978.5 |
| Temp(K) | 74770.0 | 272.504809 | 24.342725 | 196.0 | 262.3 | 278.6 | 290.800 | 405.6 |
| Theta(K) | 74755.0 | 321.023759 | 14.597956 | 250.2 | 311.5 | 316.7 | 326.900 | 764.5 |
| RH(%) | 74770.0 | -40.991320 | 280.819979 | -999.0 | 14.0 | 39.0 | 57.000 | 100.0 |
| MXRAT(g/kg) | 74725.0 | 5.624826 | 6.423727 | 0.0 | 0.7 | 4.4 | 9.200 | 97.4 |
| WSPD(m/s) | 5230.0 | 8.419828 | 3.056892 | 1.0 | 5.9 | 9.4 | 10.700 | 12.9 |
| WDIR(Deg) | 5230.0 | 217.083939 | 106.624875 | 1.0 | 148.0 | 267.0 | 283.000 | 360.0 |
combined_df.to_csv('combined_df.csv')
path = '/Users/junyuyao/Desktop/德州奥斯汀/G14 summer/cs data research/JointUrban2003/Profile Data'
os.chdir(path)
wdsd_df = pd.read_excel('pnnl_sodar_day.xls')
wdsd_df
| ************************************************************************ | Unnamed: 1 | Unnamed: 2 | Unnamed: 3 | Unnamed: 4 | Unnamed: 5 | Unnamed: 6 | Unnamed: 7 | Unnamed: 8 | Unnamed: 9 | ... | Unnamed: 39 | Day188 | Unnamed: 41 | Unnamed: 42 | Unnamed: 43 | Unnamed: 44 | Day190 | Unnamed: 46 | Unnamed: 47 | Unnamed: 48 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | OKC | JOINT | URBAN | 2003 | Field | Experiment | July | 2003 | NaN | NaN | ... | NaN | Day188 | 1800.0 | 1900.0 | 2000.0 | NaN | Day190 | 1800.0 | 1900.0 | 2000.0 |
| 1 | PNNL | Sodar | Data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | 6.56 | 40.0 | NaN | NaN | NaN | 5.33 | 30.0 | NaN | NaN |
| 2 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | 7.71 | 50.0 | NaN | NaN | NaN | 6.53 | 40.0 | NaN | NaN |
| 3 | POC | : | Larry | Berg | <larry.berg@pnl.gov> | NaN | NaN | NaN | NaN | NaN | ... | NaN | 8.02 | 60.0 | NaN | NaN | NaN | 6.84 | 50.0 | NaN | NaN |
| 4 | NaN | Will | Shaw | <will.shaw@pnl.gov> | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | 8.38 | 70.0 | NaN | NaN | NaN | 7.6 | 60.0 | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 23097 | 2003197 | 23:45:00.0 | 460 | 11.1 | 7.87 | 0.9 | 13.61 | 235 | 0.145 | 0.168 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 23098 | 2003197 | 23:45:00.0 | 470 | -9999 | 7.56 | 0.77 | -9999 | -9999 | 0.212 | 0.212 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 23099 | 2003197 | 23:45:00.0 | 480 | -9999 | 7.32 | 0.75 | -9999 | -9999 | 0.211 | 0.211 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 23100 | 2003197 | 23:45:00.0 | 490 | -9999 | 7.09 | 0.75 | -9999 | -9999 | -9999 | 0.213 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 23101 | 2003197 | 23:45:00.0 | 500 | -9999 | 6.92 | 0.74 | -9999 | -9999 | -9999 | 0.211 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
23102 rows × 49 columns
# replace -9999 to NaN
wdsd_df = wdsd_df.replace(-9999,np.nan)
wdsd_df
| ************************************************************************ | Unnamed: 1 | Unnamed: 2 | Unnamed: 3 | Unnamed: 4 | Unnamed: 5 | Unnamed: 6 | Unnamed: 7 | Unnamed: 8 | Unnamed: 9 | ... | Unnamed: 39 | Day188 | Unnamed: 41 | Unnamed: 42 | Unnamed: 43 | Unnamed: 44 | Day190 | Unnamed: 46 | Unnamed: 47 | Unnamed: 48 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | OKC | JOINT | URBAN | 2003 | Field | Experiment | July | 2003 | NaN | NaN | ... | NaN | Day188 | 1800.0 | 1900.0 | 2000.0 | NaN | Day190 | 1800.0 | 1900.0 | 2000.0 |
| 1 | PNNL | Sodar | Data | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | 6.56 | 40.0 | NaN | NaN | NaN | 5.33 | 30.0 | NaN | NaN |
| 2 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | 7.71 | 50.0 | NaN | NaN | NaN | 6.53 | 40.0 | NaN | NaN |
| 3 | POC | : | Larry | Berg | <larry.berg@pnl.gov> | NaN | NaN | NaN | NaN | NaN | ... | NaN | 8.02 | 60.0 | NaN | NaN | NaN | 6.84 | 50.0 | NaN | NaN |
| 4 | NaN | Will | Shaw | <will.shaw@pnl.gov> | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | 8.38 | 70.0 | NaN | NaN | NaN | 7.6 | 60.0 | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 23097 | 2003197 | 23:45:00.0 | 460 | 11.1 | 7.87 | 0.9 | 13.61 | 235 | 0.145 | 0.168 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 23098 | 2003197 | 23:45:00.0 | 470 | NaN | 7.56 | 0.77 | NaN | NaN | 0.212 | 0.212 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 23099 | 2003197 | 23:45:00.0 | 480 | NaN | 7.32 | 0.75 | NaN | NaN | 0.211 | 0.211 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 23100 | 2003197 | 23:45:00.0 | 490 | NaN | 7.09 | 0.75 | NaN | NaN | NaN | 0.213 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 23101 | 2003197 | 23:45:00.0 | 500 | NaN | 6.92 | 0.74 | NaN | NaN | NaN | 0.211 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
23102 rows × 49 columns
# remove unwanted rows
wdsd_df = wdsd_df.iloc[59:]
wdsd_df = wdsd_df.iloc[:,:11]
wdsd_df
| ************************************************************************ | Unnamed: 1 | Unnamed: 2 | Unnamed: 3 | Unnamed: 4 | Unnamed: 5 | Unnamed: 6 | Unnamed: 7 | Unnamed: 8 | Unnamed: 9 | Unnamed: 10 | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 59 | Julian | HH:MM:SS.s | Height | U-Mean | V-Mean | W-Mean | WSPD | WDIR | U-SDEV | V-SDEV | W-SDEV |
| 60 | Date | (UTC) | (m) | (m/s) | (m/s) | (m/s) | (m/s) | (deg) | (m/s) | (m/s) | (m/s) |
| 61 | ----------------------------------------------... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 62 | 2003183 | 00:00:00.0 | 30 | 1.84 | 5.35 | -0.47 | 5.66 | 199 | NaN | NaN | 0.208 |
| 63 | 2003183 | 00:00:00.0 | 40 | 2.46 | 1.8 | -0.16 | 3.05 | 234 | 0.312 | 0.375 | 0.17 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 23097 | 2003197 | 23:45:00.0 | 460 | 11.1 | 7.87 | 0.9 | 13.61 | 235 | 0.145 | 0.168 | 0.182 |
| 23098 | 2003197 | 23:45:00.0 | 470 | NaN | 7.56 | 0.77 | NaN | NaN | 0.212 | 0.212 | 0.265 |
| 23099 | 2003197 | 23:45:00.0 | 480 | NaN | 7.32 | 0.75 | NaN | NaN | 0.211 | 0.211 | 0.264 |
| 23100 | 2003197 | 23:45:00.0 | 490 | NaN | 7.09 | 0.75 | NaN | NaN | NaN | 0.213 | 0.267 |
| 23101 | 2003197 | 23:45:00.0 | 500 | NaN | 6.92 | 0.74 | NaN | NaN | NaN | 0.211 | 0.264 |
23043 rows × 11 columns
wdsd_df.columns = wdsd_df.iloc[0]
# clear the empty rows
wdsd_df = wdsd_df.iloc[3:]
wdsd_df = wdsd_df.drop(wdsd_df[wdsd_df.isnull().T.all()].index)
wdsd_df = wdsd_df.reset_index(drop= True)
wdsd_df
| 59 | Julian | HH:MM:SS.s | Height | U-Mean | V-Mean | W-Mean | WSPD | WDIR | U-SDEV | V-SDEV | W-SDEV |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2003183 | 00:00:00.0 | 30 | 1.84 | 5.35 | -0.47 | 5.66 | 199 | NaN | NaN | 0.208 |
| 1 | 2003183 | 00:00:00.0 | 40 | 2.46 | 1.8 | -0.16 | 3.05 | 234 | 0.312 | 0.375 | 0.17 |
| 2 | 2003183 | 00:00:00.0 | 50 | 2.97 | 2 | -0.13 | 3.58 | 236 | 0.337 | 0.3 | 0.169 |
| 3 | 2003183 | 00:00:00.0 | 60 | 2.97 | 2.25 | -0.01 | 3.72 | 233 | 0.338 | 0.196 | 0.171 |
| 4 | 2003183 | 00:00:00.0 | 70 | 2.9 | 2.39 | 0.05 | 3.76 | 231 | 0.255 | 0.202 | 0.209 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 23035 | 2003197 | 23:45:00.0 | 460 | 11.1 | 7.87 | 0.9 | 13.61 | 235 | 0.145 | 0.168 | 0.182 |
| 23036 | 2003197 | 23:45:00.0 | 470 | NaN | 7.56 | 0.77 | NaN | NaN | 0.212 | 0.212 | 0.265 |
| 23037 | 2003197 | 23:45:00.0 | 480 | NaN | 7.32 | 0.75 | NaN | NaN | 0.211 | 0.211 | 0.264 |
| 23038 | 2003197 | 23:45:00.0 | 490 | NaN | 7.09 | 0.75 | NaN | NaN | NaN | 0.213 | 0.267 |
| 23039 | 2003197 | 23:45:00.0 | 500 | NaN | 6.92 | 0.74 | NaN | NaN | NaN | 0.211 | 0.264 |
23040 rows × 11 columns
# change time format
wdsd_df['datetime'] = pd.to_datetime( wdsd_df['Julian'].astype(str) + wdsd_df['HH:MM:SS.s']
,format ='%Y%j%H:%M:%S.%f'
)
wdsd_df = wdsd_df.reset_index(level=0).set_index('datetime')
wdsd_df = wdsd_df.drop(['index'],axis = 1)
wdsd_df
| 59 | Julian | HH:MM:SS.s | Height | U-Mean | V-Mean | W-Mean | WSPD | WDIR | U-SDEV | V-SDEV | W-SDEV |
|---|---|---|---|---|---|---|---|---|---|---|---|
| datetime | |||||||||||
| 2003-07-02 00:00:00 | 2003183 | 00:00:00.0 | 30 | 1.84 | 5.35 | -0.47 | 5.66 | 199 | NaN | NaN | 0.208 |
| 2003-07-02 00:00:00 | 2003183 | 00:00:00.0 | 40 | 2.46 | 1.8 | -0.16 | 3.05 | 234 | 0.312 | 0.375 | 0.17 |
| 2003-07-02 00:00:00 | 2003183 | 00:00:00.0 | 50 | 2.97 | 2 | -0.13 | 3.58 | 236 | 0.337 | 0.3 | 0.169 |
| 2003-07-02 00:00:00 | 2003183 | 00:00:00.0 | 60 | 2.97 | 2.25 | -0.01 | 3.72 | 233 | 0.338 | 0.196 | 0.171 |
| 2003-07-02 00:00:00 | 2003183 | 00:00:00.0 | 70 | 2.9 | 2.39 | 0.05 | 3.76 | 231 | 0.255 | 0.202 | 0.209 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2003-07-16 23:45:00 | 2003197 | 23:45:00.0 | 460 | 11.1 | 7.87 | 0.9 | 13.61 | 235 | 0.145 | 0.168 | 0.182 |
| 2003-07-16 23:45:00 | 2003197 | 23:45:00.0 | 470 | NaN | 7.56 | 0.77 | NaN | NaN | 0.212 | 0.212 | 0.265 |
| 2003-07-16 23:45:00 | 2003197 | 23:45:00.0 | 480 | NaN | 7.32 | 0.75 | NaN | NaN | 0.211 | 0.211 | 0.264 |
| 2003-07-16 23:45:00 | 2003197 | 23:45:00.0 | 490 | NaN | 7.09 | 0.75 | NaN | NaN | NaN | 0.213 | 0.267 |
| 2003-07-16 23:45:00 | 2003197 | 23:45:00.0 | 500 | NaN | 6.92 | 0.74 | NaN | NaN | NaN | 0.211 | 0.264 |
23040 rows × 11 columns
# drop unwanted columns
wdsd_df = wdsd_df.drop(['Julian','HH:MM:SS.s','U-Mean','V-Mean','W-Mean','U-SDEV','V-SDEV','W-SDEV'],axis = 1)
wdsd_df
| 59 | Height | WSPD | WDIR |
|---|---|---|---|
| datetime | |||
| 2003-07-02 00:00:00 | 30 | 5.66 | 199 |
| 2003-07-02 00:00:00 | 40 | 3.05 | 234 |
| 2003-07-02 00:00:00 | 50 | 3.58 | 236 |
| 2003-07-02 00:00:00 | 60 | 3.72 | 233 |
| 2003-07-02 00:00:00 | 70 | 3.76 | 231 |
| ... | ... | ... | ... |
| 2003-07-16 23:45:00 | 460 | 13.61 | 235 |
| 2003-07-16 23:45:00 | 470 | NaN | NaN |
| 2003-07-16 23:45:00 | 480 | NaN | NaN |
| 2003-07-16 23:45:00 | 490 | NaN | NaN |
| 2003-07-16 23:45:00 | 500 | NaN | NaN |
23040 rows × 3 columns
wdsd_df.isna().sum()
59 Height 0 WSPD 3081 WDIR 3081 dtype: int64
pwg = 'additional wind data'
os.chdir(pwg)
# visualize the amount of data available
for i in range(1,15):
if i<10:
ad_wind_df = pd.read_csv('dpg_pwids0'+str(i)+'.csv')
else:
ad_wind_df = pd.read_csv('dpg_pwids'+str(i)+'.csv')
ad_wind_df.head(5000)
/Users/junyuyao/opt/anaconda3/lib/python3.9/site-packages/IPython/core/interactiveshell.py:3444: DtypeWarning: Columns (0,2,3,4,5,6) have mixed types.Specify dtype option on import or set low_memory=False. exec(code_obj, self.user_global_ns, self.user_ns)
| Julian | HH:MM:SS.s | WSPD | WDIR | TEMP | RH | QC | Unnamed: 7 | |
|---|---|---|---|---|---|---|---|---|
| 0 | Date | (UTC) | (m/s) | (Deg) | (C) | (%) | FLAG | NaN |
| 1 | 2003180 | 00:00:00.0 | 3.90 | 115.1 | 30.02 | 44.63 | 0000 | NaN |
| 2 | 2003180 | 00:00:10.0 | 3.78 | 131.9 | 29.97 | 44.74 | 0000 | NaN |
| 3 | 2003180 | 00:00:20.0 | 3.02 | 116.9 | 29.97 | 44.95 | 0000 | NaN |
| 4 | 2003180 | 00:00:30.0 | 4.28 | 132.8 | 29.95 | 44.91 | 0000 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4995 | 2003180 | 13:52:20.0 | 2.45 | 229.7 | 23.41 | 70.40 | 0000 | NaN |
| 4996 | 2003180 | 13:52:30.0 | 1.57 | 239.6 | 23.45 | 70.50 | 0000 | NaN |
| 4997 | 2003180 | 13:52:40.0 | 2.06 | 229.1 | 23.47 | 70.40 | 0000 | NaN |
| 4998 | 2003180 | 13:52:50.0 | 1.42 | 227.4 | 23.44 | 70.50 | 0000 | NaN |
| 4999 | 2003180 | 13:53:00.0 | 1.13 | 235.9 | 23.45 | 70.40 | 0000 | NaN |
5000 rows × 8 columns
ad_wind_df = ad_wind_df.replace(-9999, np.nan)
ad_wind_df['WSPD'].isna().sum()
0
pwd = '/Users/junyuyao/Desktop/德州奥斯汀/G14 summer/cs data research/JointUrban2003/fast response sf6/TGA'
os.chdir(pwd)
df183 = pd.read_csv('')
| concentration_van0 | concentration_van1 | concentration_van2 | concentration_van3 | concentration_van4 | concentration_van6 | concentration_van7 | concentration_van8 | concentration_van9 | HGHT(m) | PRESS(hPa) | Temp(K) | Theta(K) | RH(%) | MXRAT(g/kg) | WSPD(m/s) | WDIR(Deg) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| datetime | |||||||||||||||||
| 2003-06-29 14:00:00.000 | 0.0 | 0.0 | 0.0 | 0.0 | 1030.6 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 975.8 | 296.7 | 298.8 | 67.0 | 12.7 | 2.0 | 200.0 |
| 2003-06-29 14:00:00.500 | 0.0 | 0.0 | 0.0 | 0.0 | 1030.5 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2003-06-29 14:00:01.000 | 0.0 | 0.0 | 0.0 | 0.0 | 1033.7 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2003-06-29 14:00:01.500 | 0.0 | 0.0 | 0.0 | 0.0 | 1033.7 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2003-06-29 14:00:02.000 | 0.0 | 0.0 | 0.0 | 0.0 | 1024.9 | 0.0 | 0.0 | 0.0 | 0.0 | 5.0 | 975.4 | 296.7 | 298.8 | 67.0 | 12.7 | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2003-07-29 09:29:58.000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 5387.0 | 508.1 | 266.4 | 323.2 | 11.0 | 0.5 | NaN | NaN |
| 2003-07-29 09:29:58.500 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2003-07-29 09:29:59.000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2003-07-29 09:29:59.500 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2003-07-29 09:30:00.000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 5391.0 | 507.9 | 266.4 | 323.3 | 11.0 | 0.5 | NaN | NaN |
432040 rows × 17 columns
combined_df.index[1]
Timestamp('2003-06-29 14:00:00.500000')
# height is changed to altitude by adding 361 m
# change true_height variable to revert to height
def find_temp(altitude):
temps_high = []
temps_low = []
times = []
i = 0
first = True
key = True
for i in range(len(combined_df['WSPD(m/s)'])):
while(key):
true_height = combined_df['HGHT(m)'][i] #+ 361 # height is from ground, started at altitude of 361 m
if true_height == altitude:
temps_high.append(combined_df['WSPD(m/s)'][i])
temps_low.append(combined_df['WSPD(m/s)'][i])
times.append(combined_df.index[i])
key = False
# elif true_height > altitude and first == True:
# temps_high.append(combined_df['WSPD(m/s)'][i])
# temps_low.append(combined_df['WSPD(m/s)'][i])
# times.append(combined_df.index[i])
# key = False
# elif combined_df['HGHT(m)'][i]>altitude:
# temps_high.append(combined_df['WSPD(m/s)'][i])
# temps_low.append(combined_df['WSPD(m/s)'][i-1])
# times.append(combined_df.index[i])
# key = False
# else:
# first = False
# break
if np.isnan(combined_df['WSPD(m/s)'][i]):
key = True
count = 0
return times, temps_low, temps_high
# determine the temperature for a particular day at different heights
heights = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45]
print('Heights =',heights)
# find the temperatures for all times at that temp
# store them as data frames in a list
lst_df_temps = []
for e in heights:
times, temps_low, temps_high = find_temp(e)
df_temps = pd.DataFrame({'UTC':times, 'Low':temps_low, 'High':temps_high})
print(df_temps)
Heights = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45]
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) /var/folders/vv/lx8h54050gx2ndymzg8v1gm40000gn/T/ipykernel_25467/4264331609.py in <module> 7 lst_df_temps = [] 8 for e in heights: ----> 9 times, temps_low, temps_high = find_temp(e) 10 df_temps = pd.DataFrame({'UTC':times, 'Low':temps_low, 'High':temps_high}) 11 print(df_temps) /var/folders/vv/lx8h54050gx2ndymzg8v1gm40000gn/T/ipykernel_25467/1414482129.py in find_temp(altitude) 13 14 while(key): ---> 15 true_height = combined_df['HGHT(m)'][i] #+ 361 # height is from ground, started at altitude of 361 m 16 if true_height == altitude: 17 temps_high.append(combined_df['WSPD(m/s)'][i]) ~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/series.py in __getitem__(self, key) 936 key = unpack_1tuple(key) 937 --> 938 if is_integer(key) and self.index._should_fallback_to_positional(): 939 return self._values[key] 940 KeyboardInterrupt:
def temp_func_generator(df_temp, i):
end_time = df_temp['UTC'][i+1]*36 # convert to seconds
start_time = df_temp['UTC'][i]*36 # convert to seconds
end_temp = df_temp['Avg'][i+1]
start_temp = df_temp['Avg'][i]
m = (end_temp-start_temp)/(end_time-start_time)
b = start_temp-(m*start_time)
temp = lambda time: round((m*time)+b, 1)
return temp
def create_all_temps(df_temp, time_interval):
times = []
temps = []
temp_func_list = [temp_func_generator(df_temp, i) for i in range(len(df_temp)-1)]
for i in range(len(temp_func_list)):
curr_times = [e for e in np.arange(df_temp['UTC'][i]*36, df_temp['UTC'][i+1]*36, time_interval)]
temps = temps + [temp_func_list[i](e) for e in curr_times]
times = times + curr_times
return times, temps
# find the average temperatures and add it to the dataframe
temps_avg = [((temps_low[i]+temps_high[i])/2)for i in range(len(times))]
df_temps.insert(len(df_temps.columns), 'Avg', temps_avg)
# add dataframe to list
lst_df_temps.append(df_temps.copy(deep=True))
print(os.getcwd())
/Users/junyuyao/Desktop/德州奥斯汀/G14 summer/cs data research/JointUrban2003/fast response sf6/TGA
df1[['WSPD(m/s)','WDIR(Deg)']].value_counts()
# WSPD and WDIR are missing too much data,
# it is not likly that they will provide sufficient information through training
WSPD(m/s) WDIR(Deg)
-999.9 -999.9 111171
9.5 273.0 42
12.5 290.0 41
11.4 282.0 38
12.4 290.0 27
...
4.9 90.0 1
8.2 357.0 1
8.3 7.0 1
18.0 1
8.8 274.0 1
Length: 1671, dtype: int64
df2 = df1.drop(df1[df1['WSPD(m/s)']== -999.9].T)
df2.head()
| Julian (Date) | HH:MM:SS.s(UTC) | TIME (sec) | HGHT(m) | PRESS(hPa) | Temp(K) | Theta(K) | RH(%) | MXRAT(g/kg) | WSPD(m/s) | WDIR(Deg) | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 2003180.0 | 14:00:00 | 0.0 | 0.0 | 975.8 | 296.7 | 298.8 | 67.0 | 12.7 | 2.0 | 200.0 |
| 1109 | 2003180.0 | 15:15:00 | 0.0 | 0.0 | 976.1 | 298.2 | 300.3 | 60.0 | 12.4 | 2.0 | 225.0 |
| 2028 | 2003180.0 | 16:06:00 | 0.0 | 0.0 | 975.9 | 300.0 | 302.1 | 50.0 | 11.5 | 3.0 | 180.0 |
| 4255 | 2003180.0 | 18:32:00 | 0.0 | 0.0 | 975.0 | 299.7 | 301.9 | 57.0 | 12.9 | 5.0 | 90.0 |
| 4256 | 2003180.0 | 18:32:00 | 8.0 | 12.0 | 973.5 | 299.2 | 301.5 | 57.0 | 12.5 | 4.1 | 100.0 |
plt.hist2d(df2['WSPD(m/s)'], df2['WDIR(Deg)'], bins=(50, 50), vmax=20)
plt.colorbar()
plt.xlabel('Wind Direction [deg]')
plt.ylabel('Wind Velocity [m/s]')
Text(0, 0.5, 'Wind Velocity [m/s]')
wv = df.pop('WSPD(m/s)')
# Convert to radians.
wd_rad = df.pop('WDIR(Deg)')*np.pi / 180
# Calculate the wind x and y components.
df2['Wx'] = wv*np.cos(wd_rad)
df2['Wy'] = wv*np.sin(wd_rad)
df2
| Julian (Date) | HH:MM:SS.s(UTC) | TIME (sec) | HGHT(m) | PRESS(hPa) | Temp(K) | Theta(K) | RH(%) | MXRAT(g/kg) | WSPD(m/s) | WDIR(Deg) | Wx | Wy | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 2003180.0 | 14:00:00 | 0.0 | 0.0 | 975.8 | 296.7 | 298.8 | 67.0 | 12.7 | 2.0 | 200.0 | -1.879385e+00 | -6.840403e-01 |
| 1109 | 2003180.0 | 15:15:00 | 0.0 | 0.0 | 976.1 | 298.2 | 300.3 | 60.0 | 12.4 | 2.0 | 225.0 | -1.414214e+00 | -1.414214e+00 |
| 2028 | 2003180.0 | 16:06:00 | 0.0 | 0.0 | 975.9 | 300.0 | 302.1 | 50.0 | 11.5 | 3.0 | 180.0 | -3.000000e+00 | 3.673940e-16 |
| 4255 | 2003180.0 | 18:32:00 | 0.0 | 0.0 | 975.0 | 299.7 | 301.9 | 57.0 | 12.9 | 5.0 | 90.0 | 3.061617e-16 | 5.000000e+00 |
| 4256 | 2003180.0 | 18:32:00 | 8.0 | 12.0 | 973.5 | 299.2 | 301.5 | 57.0 | 12.5 | 4.1 | 100.0 | -7.119575e-01 | 4.037712e+00 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 104002 | 2003210.0 | 03:01:00 | 0.0 | 0.0 | 970.6 | 305.5 | 308.1 | 40.0 | 12.7 | 4.0 | 150.0 | -3.464102e+00 | 2.000000e+00 |
| 105168 | 2003210.0 | 03:59:00 | 0.0 | 0.0 | 970.6 | 304.3 | 306.9 | 44.0 | 13.1 | 2.0 | 135.0 | -1.414214e+00 | 1.414214e+00 |
| 108312 | 2003210.0 | 06:00:00 | 0.0 | 0.0 | 970.2 | 302.4 | 305.0 | 49.0 | 13.1 | 2.0 | 220.0 | -1.532089e+00 | -1.285575e+00 |
| 111313 | 2003210.0 | 08:00:00 | 0.0 | 0.0 | 970.3 | 301.6 | 304.2 | 50.0 | 12.7 | 1.0 | 220.0 | -7.660444e-01 | -6.427876e-01 |
| 112592 | 2003210.0 | 09:01:00 | 0.0 | 0.0 | 970.0 | 300.3 | 302.9 | 52.0 | 12.2 | 1.0 | 220.0 | -7.660444e-01 | -6.427876e-01 |
4486 rows × 13 columns
plt.hist2d(df2['Wx'], df2['Wy'], bins=(50, 50), vmax=13)
plt.colorbar()
plt.xlabel('Wind X [m/s]')
plt.ylabel('Wind Y [m/s]')
ax = plt.gca()
ax.axis('tight')
(-10.0, 12.829332450250726, -12.431523692103417, 5.985384301558945)
print(df2['Wx'].max())
print(df2['Wx'].min())
print(df2['Wy'].max())
print(df2['Wy'].min())
12.829332450250726 -10.0 5.985384301558945 -12.431523692103417
df1[['Temp','PRESS']].value_counts()
Temp PRESS
304.9 960.2 6
290.1 780.1 6
286.8 736.8 6
250.9 368 5
265.8 494 5
..
260.4 454.1 1
453.5 1
453.4 1
452.6 1
(K) (hPa) 1
Length: 96799, dtype: int64
df3 = df1.drop(df1[df1['Temp(K)']== -999.9].T)
df3 = df1.drop(df1[df1['HGHT(m)']== -999.9].T)
df3
| Julian (Date) | HH:MM:SS.s(UTC) | TIME (sec) | HGHT(m) | PRESS(hPa) | Temp(K) | Theta(K) | RH(%) | MXRAT(g/kg) | WSPD(m/s) | WDIR(Deg) | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 2003180.0 | 14:00:00 | 0.0 | 0.0 | 975.8 | 296.7 | 298.8 | 67.0 | 12.7 | 2.0 | 200.0 |
| 3 | 2003180.0 | 14:00:00 | 2.0 | 5.0 | 975.4 | 296.7 | 298.8 | 67.0 | 12.7 | -999.9 | -999.9 |
| 4 | 2003180.0 | 14:00:00 | 4.0 | 12.0 | 974.4 | 296.2 | 298.4 | 68.0 | 12.5 | -999.9 | -999.9 |
| 5 | 2003180.0 | 14:00:00 | 6.0 | 18.0 | 973.7 | 296.0 | 298.3 | 69.0 | 12.5 | -999.9 | -999.9 |
| 6 | 2003180.0 | 14:00:00 | 8.0 | 29.0 | 972.5 | 295.8 | 298.2 | 70.0 | 12.6 | -999.9 | -999.9 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 115722 | 2003210.0 | 09:01:00 | 6324.0 | 20247.0 | 52.3 | 215.7 | 501.0 | 1.0 | 0.0 | -999.9 | -999.9 |
| 115723 | 2003210.0 | 09:01:00 | 6326.0 | 20256.0 | 52.2 | 215.8 | 501.5 | 1.0 | 0.0 | -999.9 | -999.9 |
| 115724 | 2003210.0 | 09:01:00 | 6328.0 | 20264.0 | 52.1 | 215.8 | 501.8 | 1.0 | 0.0 | -999.9 | -999.9 |
| 115725 | 2003210.0 | 09:01:00 | 6330.0 | 20268.0 | 52.1 | 215.7 | 501.6 | 1.0 | 0.0 | -999.9 | -999.9 |
| 115726 | 2003210.0 | 09:01:00 | 6332.0 | 20268.0 | 52.1 | 215.6 | 501.3 | 1.0 | 0.0 | -999.9 | -999.9 |
115657 rows × 11 columns
# try to used known formulas to gain normalized Temp Press at a certain height(0m)
df3['norm_Temp'] = df3.apply(lambda x: x['HGHT(m)']/1000*6.49 + x['Temp(K)'],axis = 1)
df3
| Julian (Date) | HH:MM:SS.s(UTC) | TIME (sec) | HGHT(m) | PRESS(hPa) | Temp(K) | Theta(K) | RH(%) | MXRAT(g/kg) | WSPD(m/s) | WDIR(Deg) | norm_Temp | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | 2003180.0 | 14:00:00 | 0.0 | 0.0 | 975.8 | 296.7 | 298.8 | 67.0 | 12.7 | 2.0 | 200.0 | 296.70000 |
| 3 | 2003180.0 | 14:00:00 | 2.0 | 5.0 | 975.4 | 296.7 | 298.8 | 67.0 | 12.7 | -999.9 | -999.9 | 296.73245 |
| 4 | 2003180.0 | 14:00:00 | 4.0 | 12.0 | 974.4 | 296.2 | 298.4 | 68.0 | 12.5 | -999.9 | -999.9 | 296.27788 |
| 5 | 2003180.0 | 14:00:00 | 6.0 | 18.0 | 973.7 | 296.0 | 298.3 | 69.0 | 12.5 | -999.9 | -999.9 | 296.11682 |
| 6 | 2003180.0 | 14:00:00 | 8.0 | 29.0 | 972.5 | 295.8 | 298.2 | 70.0 | 12.6 | -999.9 | -999.9 | 295.98821 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 115722 | 2003210.0 | 09:01:00 | 6324.0 | 20247.0 | 52.3 | 215.7 | 501.0 | 1.0 | 0.0 | -999.9 | -999.9 | 347.10303 |
| 115723 | 2003210.0 | 09:01:00 | 6326.0 | 20256.0 | 52.2 | 215.8 | 501.5 | 1.0 | 0.0 | -999.9 | -999.9 | 347.26144 |
| 115724 | 2003210.0 | 09:01:00 | 6328.0 | 20264.0 | 52.1 | 215.8 | 501.8 | 1.0 | 0.0 | -999.9 | -999.9 | 347.31336 |
| 115725 | 2003210.0 | 09:01:00 | 6330.0 | 20268.0 | 52.1 | 215.7 | 501.6 | 1.0 | 0.0 | -999.9 | -999.9 | 347.23932 |
| 115726 | 2003210.0 | 09:01:00 | 6332.0 | 20268.0 | 52.1 | 215.6 | 501.3 | 1.0 | 0.0 | -999.9 | -999.9 | 347.13932 |
115657 rows × 12 columns
df3.tail(200)
| Julian (Date) | HH:MM:SS.s(UTC) | TIME (sec) | HGHT(m) | PRESS(hPa) | Temp(K) | Theta(K) | RH(%) | MXRAT(g/kg) | WSPD(m/s) | WDIR(Deg) | norm_Temp | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 115527 | 2003210.0 | 09:01:00 | 5934.0 | 18930.0 | 64.5 | 211.7 | 463.1 | 2.0 | 0.0 | -999.9 | -999.9 | 334.55570 |
| 115528 | 2003210.0 | 09:01:00 | 5936.0 | 18936.0 | 64.5 | 211.7 | 463.1 | 2.0 | 0.0 | -999.9 | -999.9 | 334.59464 |
| 115529 | 2003210.0 | 09:01:00 | 5938.0 | 18941.0 | 64.4 | 211.8 | 463.6 | 2.0 | 0.0 | -999.9 | -999.9 | 334.72709 |
| 115530 | 2003210.0 | 09:01:00 | 5940.0 | 18949.0 | 64.3 | 211.8 | 463.8 | 2.0 | 0.0 | -999.9 | -999.9 | 334.77901 |
| 115531 | 2003210.0 | 09:01:00 | 5942.0 | 18954.0 | 64.3 | 212.0 | 464.2 | 2.0 | 0.0 | -999.9 | -999.9 | 335.01146 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 115722 | 2003210.0 | 09:01:00 | 6324.0 | 20247.0 | 52.3 | 215.7 | 501.0 | 1.0 | 0.0 | -999.9 | -999.9 | 347.10303 |
| 115723 | 2003210.0 | 09:01:00 | 6326.0 | 20256.0 | 52.2 | 215.8 | 501.5 | 1.0 | 0.0 | -999.9 | -999.9 | 347.26144 |
| 115724 | 2003210.0 | 09:01:00 | 6328.0 | 20264.0 | 52.1 | 215.8 | 501.8 | 1.0 | 0.0 | -999.9 | -999.9 | 347.31336 |
| 115725 | 2003210.0 | 09:01:00 | 6330.0 | 20268.0 | 52.1 | 215.7 | 501.6 | 1.0 | 0.0 | -999.9 | -999.9 | 347.23932 |
| 115726 | 2003210.0 | 09:01:00 | 6332.0 | 20268.0 | 52.1 | 215.6 | 501.3 | 1.0 | 0.0 | -999.9 | -999.9 | 347.13932 |
200 rows × 12 columns